1. Load Data on Model Results for all Country

Loading the final_results.csv to R enivronment:

HMD_BestResults <- read.csv("final_results.csv")
str(HMD_BestResults)
## 'data.frame':    456 obs. of  4 variables:
##  $ Country : chr  "AUS" "AUS" "AUS" "AUS" ...
##  $ Sex     : chr  "Female" "Female" "Female" "Female" ...
##  $ variable: chr  "preds" "preds_boost" "pred_LC_svd" "LCwithGAM" ...
##  $ mse     : num  0.0548 0.0478 0.0434 0.0756 0.0412 ...

Rename the variable column to Model and the mse to MSE. Rename preds to NN, preds_boost to NN_Boost, stacked to Stacked and pred_LC_svd to LC_SVD to have more clear names on the graphs and pivots.

colnames(HMD_BestResults)[3] <- "Model"
colnames(HMD_BestResults)[4] <- "MSE"
HMD_BestResults$Model[HMD_BestResults$Model == "preds"] <- "NN"
HMD_BestResults$Model[HMD_BestResults$Model == "preds_boost"] <- "NN_Boost"
HMD_BestResults$Model[HMD_BestResults$Model == "pred_LC_svd"] <- "LC_SVD"
HMD_BestResults$Model[HMD_BestResults$Model == "stacked"] <- "Stacked"
str(HMD_BestResults)
## 'data.frame':    456 obs. of  4 variables:
##  $ Country: chr  "AUS" "AUS" "AUS" "AUS" ...
##  $ Sex    : chr  "Female" "Female" "Female" "Female" ...
##  $ Model  : chr  "NN" "NN_Boost" "LC_SVD" "LCwithGAM" ...
##  $ MSE    : num  0.0548 0.0478 0.0434 0.0756 0.0412 ...

Load packages for analysis:

library(ggplot2)
library(tidyr)
library(stringr)
library(dplyr)

Define function to shape the HMD_BestResults to a one country - one row styled table.

myspread <- function(df, key, value) {
  # quote key
  keyq <- rlang::enquo(key)
  # break value vector into quotes
  valueq <- rlang::enquo(value)
  s <- rlang::quos(!!valueq)
  df %>% gather(variable, value, !!!s) %>%
    unite(temp, !!keyq, variable) %>%
    spread(temp, value)
}

Apply the function on HMD_BestResults and see the results:

HMD_BestResults <- HMD_BestResults %>% spread(Sex, MSE)
HMD_BestResults <- HMD_BestResults %>% myspread(Model, c(Female,Male))
HMD_BestResults <- HMD_BestResults[,c("Country",
                                      colnames(HMD_BestResults)[str_detect(colnames(HMD_BestResults), "_Male")],
                                      colnames(HMD_BestResults)[str_detect(colnames(HMD_BestResults), "_Female")])]
str(HMD_BestResults)
## 'data.frame':    38 obs. of  13 variables:
##  $ Country         : chr  "AUS" "AUT" "BEL" "BGR" ...
##  $ LC_SVD_Male     : num  0.0612 0.1017 0.0937 0.054 0.2347 ...
##  $ LCwithGAM_Male  : num  0.1159 0.0588 0.0774 0.0807 0.2646 ...
##  $ NN_Boost_Male   : num  0.0771 0.1174 0.0841 0.0632 0.1469 ...
##  $ NN_Male         : num  0.0995 0.1585 0.1122 0.0691 0.175 ...
##  $ RotatedLC_Male  : num  0.0599 0.0558 0.073 0.2121 0.3199 ...
##  $ Stacked_Male    : num  0.0708 0.0717 0.0735 0.1061 0.2313 ...
##  $ LC_SVD_Female   : num  0.0434 0.0823 0.0648 0.0725 0.1175 ...
##  $ LCwithGAM_Female: num  0.0756 0.0753 0.077 0.0776 0.281 ...
##  $ NN_Boost_Female : num  0.0478 0.0914 0.0723 0.0768 0.1084 ...
##  $ NN_Female       : num  0.0548 0.1042 0.0789 0.0889 0.1227 ...
##  $ RotatedLC_Female: num  0.0412 0.0795 0.066 0.1051 0.3553 ...
##  $ Stacked_Female  : num  0.0424 0.0751 0.0665 0.071 0.2327 ...

2. Get Best Models and Generate New Variables

Get best model for females:

HMD_BestResults$BestFemaleMSE <- apply(HMD_BestResults[,8:13], 1, FUN = min)


HMD_BestResults$BestFemaleModel <- NA
for (i in 1:nrow(HMD_BestResults)) {
  HMD_BestResults$BestFemaleModel[i] <- names(HMD_BestResults[,2:13])[which(
    HMD_BestResults[,2:13] == HMD_BestResults$BestFemaleMSE[i],
    arr.ind=T)[, "col"]]
}

str(HMD_BestResults)
## 'data.frame':    38 obs. of  15 variables:
##  $ Country         : chr  "AUS" "AUT" "BEL" "BGR" ...
##  $ LC_SVD_Male     : num  0.0612 0.1017 0.0937 0.054 0.2347 ...
##  $ LCwithGAM_Male  : num  0.1159 0.0588 0.0774 0.0807 0.2646 ...
##  $ NN_Boost_Male   : num  0.0771 0.1174 0.0841 0.0632 0.1469 ...
##  $ NN_Male         : num  0.0995 0.1585 0.1122 0.0691 0.175 ...
##  $ RotatedLC_Male  : num  0.0599 0.0558 0.073 0.2121 0.3199 ...
##  $ Stacked_Male    : num  0.0708 0.0717 0.0735 0.1061 0.2313 ...
##  $ LC_SVD_Female   : num  0.0434 0.0823 0.0648 0.0725 0.1175 ...
##  $ LCwithGAM_Female: num  0.0756 0.0753 0.077 0.0776 0.281 ...
##  $ NN_Boost_Female : num  0.0478 0.0914 0.0723 0.0768 0.1084 ...
##  $ NN_Female       : num  0.0548 0.1042 0.0789 0.0889 0.1227 ...
##  $ RotatedLC_Female: num  0.0412 0.0795 0.066 0.1051 0.3553 ...
##  $ Stacked_Female  : num  0.0424 0.0751 0.0665 0.071 0.2327 ...
##  $ BestFemaleMSE   : num  0.0412 0.0751 0.0648 0.071 0.1084 ...
##  $ BestFemaleModel : chr  "RotatedLC_Female" "Stacked_Female" "LC_SVD_Female" "Stacked_Female" ...

Get best model for males:

HMD_BestResults$BestMaleMSE <- apply(HMD_BestResults[,2:7], 1, FUN = min)


HMD_BestResults$BestMaleModel <- NA
for (i in 1:nrow(HMD_BestResults)) {
  HMD_BestResults$BestMaleModel[i] <- names(HMD_BestResults[,1:9])[which(
    HMD_BestResults[,1:9] == HMD_BestResults$BestMaleMSE[i],
    arr.ind=T)[, "col"]]
}

str(HMD_BestResults)
## 'data.frame':    38 obs. of  17 variables:
##  $ Country         : chr  "AUS" "AUT" "BEL" "BGR" ...
##  $ LC_SVD_Male     : num  0.0612 0.1017 0.0937 0.054 0.2347 ...
##  $ LCwithGAM_Male  : num  0.1159 0.0588 0.0774 0.0807 0.2646 ...
##  $ NN_Boost_Male   : num  0.0771 0.1174 0.0841 0.0632 0.1469 ...
##  $ NN_Male         : num  0.0995 0.1585 0.1122 0.0691 0.175 ...
##  $ RotatedLC_Male  : num  0.0599 0.0558 0.073 0.2121 0.3199 ...
##  $ Stacked_Male    : num  0.0708 0.0717 0.0735 0.1061 0.2313 ...
##  $ LC_SVD_Female   : num  0.0434 0.0823 0.0648 0.0725 0.1175 ...
##  $ LCwithGAM_Female: num  0.0756 0.0753 0.077 0.0776 0.281 ...
##  $ NN_Boost_Female : num  0.0478 0.0914 0.0723 0.0768 0.1084 ...
##  $ NN_Female       : num  0.0548 0.1042 0.0789 0.0889 0.1227 ...
##  $ RotatedLC_Female: num  0.0412 0.0795 0.066 0.1051 0.3553 ...
##  $ Stacked_Female  : num  0.0424 0.0751 0.0665 0.071 0.2327 ...
##  $ BestFemaleMSE   : num  0.0412 0.0751 0.0648 0.071 0.1084 ...
##  $ BestFemaleModel : chr  "RotatedLC_Female" "Stacked_Female" "LC_SVD_Female" "Stacked_Female" ...
##  $ BestMaleMSE     : num  0.0599 0.0558 0.073 0.054 0.1469 ...
##  $ BestMaleModel   : chr  "RotatedLC_Male" "RotatedLC_Male" "RotatedLC_Male" "LC_SVD_Male" ...

See the best model frequencies:

table(HMD_BestResults$BestFemaleModel)
## 
##    LC_SVD_Female LCwithGAM_Female  NN_Boost_Female RotatedLC_Female 
##                7                4                9                7 
##   Stacked_Female 
##               11
table(HMD_BestResults$BestMaleModel)
## 
##    LC_SVD_Male LCwithGAM_Male  NN_Boost_Male RotatedLC_Male   Stacked_Male 
##              3             14              7              9              5

Looks like NN_Boost and Stacked are the best option for females. And the LCwithGAM for males.

Let’s check the NN_Boost advantage to the simple NN solution in each country for both genders:

temp_df <- reshape2::melt(HMD_BestResults[,c("Country", "NN_Male", "NN_Boost_Male")],
                            id.var = "Country")

ggplot(temp_df, aes(x = Country, y = value, color = variable, group=variable)) +
  geom_line()

temp_df <- reshape2::melt(HMD_BestResults[,c("Country", "NN_Female", "NN_Boost_Female")],
                            id.var = "Country")

ggplot(temp_df, aes(x = Country, y = value, color = variable, group=variable)) +
  geom_line()

It seems that the \(MSE\) of NN_Boost is always better or about the same as the \(MSE\) of the simple NN.

So, next we generate a variable that measures the NN_boost advantage in \(MSE\) compared to the best not NN model.

HMD_BestResults$BestMaleMSE_notNN <- apply(HMD_BestResults[,c(2,3,6,7)], 1, FUN = min)
HMD_BestResults$NN_boost_vs_Best_notNN_male <- HMD_BestResults$NN_Boost_Male - HMD_BestResults$BestMaleMSE_notNN

HMD_BestResults$BestFemaleMSE_notNN <- apply(HMD_BestResults[,c(8,9,12,13)], 1, FUN = min)
HMD_BestResults$NN_boost_vs_Best_notNN_female <- HMD_BestResults$NN_Boost_Female - HMD_BestResults$BestFemaleMSE_notNN

str(HMD_BestResults)
## 'data.frame':    38 obs. of  21 variables:
##  $ Country                      : chr  "AUS" "AUT" "BEL" "BGR" ...
##  $ LC_SVD_Male                  : num  0.0612 0.1017 0.0937 0.054 0.2347 ...
##  $ LCwithGAM_Male               : num  0.1159 0.0588 0.0774 0.0807 0.2646 ...
##  $ NN_Boost_Male                : num  0.0771 0.1174 0.0841 0.0632 0.1469 ...
##  $ NN_Male                      : num  0.0995 0.1585 0.1122 0.0691 0.175 ...
##  $ RotatedLC_Male               : num  0.0599 0.0558 0.073 0.2121 0.3199 ...
##  $ Stacked_Male                 : num  0.0708 0.0717 0.0735 0.1061 0.2313 ...
##  $ LC_SVD_Female                : num  0.0434 0.0823 0.0648 0.0725 0.1175 ...
##  $ LCwithGAM_Female             : num  0.0756 0.0753 0.077 0.0776 0.281 ...
##  $ NN_Boost_Female              : num  0.0478 0.0914 0.0723 0.0768 0.1084 ...
##  $ NN_Female                    : num  0.0548 0.1042 0.0789 0.0889 0.1227 ...
##  $ RotatedLC_Female             : num  0.0412 0.0795 0.066 0.1051 0.3553 ...
##  $ Stacked_Female               : num  0.0424 0.0751 0.0665 0.071 0.2327 ...
##  $ BestFemaleMSE                : num  0.0412 0.0751 0.0648 0.071 0.1084 ...
##  $ BestFemaleModel              : chr  "RotatedLC_Female" "Stacked_Female" "LC_SVD_Female" "Stacked_Female" ...
##  $ BestMaleMSE                  : num  0.0599 0.0558 0.073 0.054 0.1469 ...
##  $ BestMaleModel                : chr  "RotatedLC_Male" "RotatedLC_Male" "RotatedLC_Male" "LC_SVD_Male" ...
##  $ BestMaleMSE_notNN            : num  0.0599 0.0558 0.073 0.054 0.2313 ...
##  $ NN_boost_vs_Best_notNN_male  : num  0.01715 0.06162 0.01114 0.00921 -0.08444 ...
##  $ BestFemaleMSE_notNN          : num  0.0412 0.0751 0.0648 0.071 0.1175 ...
##  $ NN_boost_vs_Best_notNN_female: num  0.00667 0.01623 0.00754 0.00577 -0.00905 ...

3. Draw Some Maps

See the best models in each country on the world map!

First, download the shapefile:

# Download the shapefile.
download.file("http://thematicmapping.org/downloads/TM_WORLD_BORDERS_SIMPL-0.3.zip" , destfile="GeoData/world_shape_file.zip")

# Unzip the file.
system("unzip GeoData/world_shape_file.zip")

Load the shapefile to R:

library(rgdal)

# Rread the shape file
world_spdf <- readOGR( 
  dsn= paste0(getwd(),"/GeoData") , 
  layer="TM_WORLD_BORDERS_SIMPL-0.3",
  verbose=FALSE
)

# Save the original order of countries so that we don't mess up the shapefile with the merge
world_spdf@data$OriginalOrder <- 1:nrow(world_spdf@data)

Cleaning the country codes in the HMD_BestResults dataframe:

# Creating the dataframe to join
HMD_ResultsToMap <- HMD_BestResults[,c("Country", "BestFemaleModel", "BestMaleModel",
                                       "NN_boost_vs_Best_notNN_male", "NN_boost_vs_Best_notNN_female")]

# Unify some country codes
HMD_ResultsToMap$Country[HMD_ResultsToMap$Country == "FRATNP"] <- "FRA"
HMD_ResultsToMap$Country[HMD_ResultsToMap$Country == "DEUTW"] <- "DEU"
HMD_ResultsToMap$Country[HMD_ResultsToMap$Country == "NZL_NP"] <- "NZL"
HMD_ResultsToMap$Country[HMD_ResultsToMap$Country == "GBR_NP"] <- "GBR"

Join with relevant variables from HMD_ResultsToMap:

ShapeDataTemp <- merge(x=world_spdf@data, y=HMD_ResultsToMap, by.x="ISO3", by.y="Country", all.x=TRUE)

# Clean up the categorical variables and convert them to factor
ShapeDataTemp$BestMaleModel[is.na(ShapeDataTemp$BestMaleModel)] <- "No_Data"
ShapeDataTemp$BestFemaleModel[is.na(ShapeDataTemp$BestFemaleModel)] <- "No_Data"
ShapeDataTemp$BestFemaleModel <- as.factor(ShapeDataTemp$BestFemaleModel)
ShapeDataTemp$BestMaleModel <- as.factor(ShapeDataTemp$BestMaleModel)

world_spdf@data <- ShapeDataTemp[order(ShapeDataTemp$OriginalOrder),]

Draw the map of the best models for females!

library(leaflet)

# Create a color palette for the map
mypalette <- colorFactor(c("green", "blue", "orange", "grey", "red", "yellow"),
                         world_spdf@data$BestFemaleModel)

# Prepare the text for tooltips
mytext <- paste(
    "Country: ", world_spdf@data$NAME,"<br/>", 
    "Best Model Female: ", world_spdf@data$BestFemaleModel, "<br/>", 
    "NN_boost_vs_Best_notNN_female: ", round(world_spdf@data$NN_boost_vs_Best_notNN_female, 3), 
    sep="") %>%
  lapply(htmltools::HTML)

# Create map object
m <- leaflet(world_spdf) %>% 
  addTiles()  %>% 
  setView( lat=10, lng=0 , zoom=2) %>%
  addPolygons( 
    fillColor = ~mypalette(BestFemaleModel), 
    stroke=FALSE,
    label = mytext,
    labelOptions = labelOptions( 
      style = list("font-weight" = "normal", padding = "3px 8px"), 
      textsize = "13px", 
      direction = "auto"
    )
  ) %>%
  addLegend(pal=mypalette, values=~BestFemaleModel, opacity=0.9, title = "Best Model for Females", position = "bottomleft")

# Show map object
m
# Save map in a html file
htmlwidgets::saveWidget(m, file="BestFemaleModel.html")

Draw the map of the best models for males!

library(leaflet)

# Create a color palette for the map
mypalette <- colorFactor(c("green", "blue", "orange", "grey", "red", "yellow"),
                         world_spdf@data$BestMaleModel)

# Prepare the text for tooltips
mytext <- paste(
    "Country: ", world_spdf@data$NAME,"<br/>", 
    "Best Model Male: ", world_spdf@data$BestMaleModel, "<br/>", 
    "NN_boost_vs_Best_notNN_male: ", round(world_spdf@data$NN_boost_vs_Best_notNN_male, 3), 
    sep="") %>%
  lapply(htmltools::HTML)

# Create map object
m <- leaflet(world_spdf) %>% 
  addTiles()  %>% 
  setView( lat=10, lng=0 , zoom=2) %>%
  addPolygons( 
    fillColor = ~mypalette(BestMaleModel), 
    stroke=FALSE,
    label = mytext,
    labelOptions = labelOptions( 
      style = list("font-weight" = "normal", padding = "3px 8px"), 
      textsize = "13px", 
      direction = "auto"
    )
  ) %>%
  addLegend(pal=mypalette, values=~BestMaleModel, opacity=0.9, title = "Best Model for Males", position = "bottomleft")

# Show map object
m
# Save map in a html file
htmlwidgets::saveWidget(m, file="BestMaleModel.html")

Draw the map of NN_boost advantage in \(MSE\) compared to the best not NN model for females!

library(leaflet)

# Set the differences on a logaritmic scale
world_spdf@data$Log_NN_boost_vs_Best_notNN_female <- log(world_spdf@data$NN_boost_vs_Best_notNN_female - (min(world_spdf@data$NN_boost_vs_Best_notNN_female, na.rm = TRUE)-0.09))

# Create a color palette for the map
mypalette <- colorNumeric(palette = "RdYlGn",
                          domain = world_spdf@data$Log_NN_boost_vs_Best_notNN_female,
                          na.color="transparent",
                          reverse = TRUE)

# Prepare the text for tooltips
mytext <- paste(
    "Country: ", world_spdf@data$NAME,"<br/>", 
    "Best Model Female: ", world_spdf@data$BestMaleFemodel, "<br/>", 
    "Log_NN_boost_vs_Best_notNN_female: ", round(world_spdf@data$Log_NN_boost_vs_Best_notNN_female, 3), 
    sep="") %>%
  lapply(htmltools::HTML)

# Create map object
m <- leaflet(world_spdf) %>% 
  addTiles()  %>% 
  setView( lat=10, lng=0 , zoom=2) %>%
  addPolygons( 
    fillColor = ~mypalette(Log_NN_boost_vs_Best_notNN_female), 
    stroke=TRUE,
    fillOpacity = 0.9, 
    color="white", 
    weight=0.3,
    label = mytext,
    labelOptions = labelOptions( 
      style = list("font-weight" = "normal", padding = "3px 8px"), 
      textsize = "13px", 
      direction = "auto"
    )
  ) %>%
  addLegend(pal=mypalette, values=~Log_NN_boost_vs_Best_notNN_female, opacity=0.9, title = "Log_NN_boost advantage in MSE - Female", position = "bottomleft")

# Show map object
m
# Save map in a html file
htmlwidgets::saveWidget(m, file="NNboostAdvantageFemale.html")

Draw the map of NN_boost advantage in \(MSE\) compared to the best not NN model for males!

library(leaflet)

# Set the differences on a logaritmic scale
world_spdf@data$Log_NN_boost_vs_Best_notNN_male <- log(world_spdf@data$NN_boost_vs_Best_notNN_male - (min(world_spdf@data$NN_boost_vs_Best_notNN_male, na.rm = TRUE)-0.09))

# Create a color palette for the map
mypalette <- colorNumeric(palette = "RdYlGn",
                          domain = world_spdf@data$Log_NN_boost_vs_Best_notNN_male,
                          na.color="transparent",
                          reverse = TRUE)

# Prepare the text for tooltips
mytext <- paste(
    "Country: ", world_spdf@data$NAME,"<br/>", 
    "Best Model male: ", world_spdf@data$BestMaleFemodel, "<br/>", 
    "Log_NN_boost_vs_Best_notNN_male: ", round(world_spdf@data$Log_NN_boost_vs_Best_notNN_male, 3), 
    sep="") %>%
  lapply(htmltools::HTML)

# Create map object
m <- leaflet(world_spdf) %>% 
  addTiles()  %>% 
  setView( lat=10, lng=0 , zoom=2) %>%
  addPolygons( 
    fillColor = ~mypalette(Log_NN_boost_vs_Best_notNN_male), 
    stroke=TRUE,
    fillOpacity = 0.9, 
    color="white", 
    weight=0.3,
    label = mytext,
    labelOptions = labelOptions( 
      style = list("font-weight" = "normal", padding = "3px 8px"), 
      textsize = "13px", 
      direction = "auto"
    )
  ) %>%
  addLegend(pal=mypalette, values=~Log_NN_boost_vs_Best_notNN_male, opacity=0.9, title = "Log_NN_boost advantage in MSE - male", position = "bottomleft")

# Show map object
m
# Save map in a html file
htmlwidgets::saveWidget(m, file="NNboostAdvantageMale.html")

4. Calculate Moran’s I for the NN_Boost Advantage

Load packages for the spatial autocorrelation analysis:

library(sf)
library(spdep)

Load the shapefile of world countries with the sf package:

s <- st_read( "GeoData/TM_WORLD_BORDERS_SIMPL-0.3.shp")
## Reading layer `TM_WORLD_BORDERS_SIMPL-0.3' from data source 
##   `E:\Egyetem Phd\8. félév\LongevityRisk+NN\GeoData\TM_WORLD_BORDERS_SIMPL-0.3.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 246 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -90 xmax: 180 ymax: 83.57027
## Geodetic CRS:  WGS 84

Add the NN_boost_vs_Best_notNN_male and NN_boost_vs_Best_notNN_female variables to the s shape data.

ShapeDataTemp <- merge(x=s, y=HMD_ResultsToMap[,c("Country",
                                                  "NN_boost_vs_Best_notNN_male",
                                                  "NN_boost_vs_Best_notNN_female")],
                       by.x="ISO3", by.y="Country", all.x=TRUE)
s <- ShapeDataTemp
str(s)
## Classes 'sf' and 'data.frame':   246 obs. of  14 variables:
##  $ ISO3                         : chr  "ABW" "AFG" "AGO" "AIA" ...
##  $ FIPS                         : chr  "AA" "AF" "AO" "AV" ...
##  $ ISO2                         : chr  "AW" "AF" "AO" "AI" ...
##  $ UN                           : int  533 4 24 660 248 8 20 530 784 32 ...
##  $ NAME                         : chr  "Aruba" "Afghanistan" "Angola" "Anguilla" ...
##  $ AREA                         : int  0 65209 124670 0 0 2740 0 80 8360 273669 ...
##  $ POP2005                      : num  102897 25067407 16095214 12256 0 ...
##  $ REGION                       : int  19 142 2 19 150 150 150 19 142 19 ...
##  $ SUBREGION                    : int  29 34 17 29 154 39 39 29 145 5 ...
##  $ LON                          : num  -70 65.2 17.5 -63 20 ...
##  $ LAT                          : num  12.5 33.7 -12.3 18.2 60.2 ...
##  $ NN_boost_vs_Best_notNN_male  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ NN_boost_vs_Best_notNN_female: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ geometry                     :sfc_MULTIPOLYGON of length 246; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:4, 1:2] -69.9 -70.1 -70.1 -69.9 12.4 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:13] "ISO3" "FIPS" "ISO2" "UN" ...

Ok, we have the two new variables.

Let’s define “neighboring” polygons.

sf::sf_use_s2(FALSE)
#s <- na.omit(s)
nb <- poly2nb(s, queen=TRUE)

Each neighboring polygon will be assigned equal weight when computing the neighboring NN advantages in \(MSE\).

lw <- nb2listw(nb, style="W", zero.policy=TRUE)

Computing the Moran’s \(I\) statistic for Females.

NN_boost_vs_Best_notNN_female_Lag <- lag.listw(lw, s$NN_boost_vs_Best_notNN_female,
                                               zero.policy = FALSE, NAOK = TRUE)

plot(NN_boost_vs_Best_notNN_female_Lag ~ s$NN_boost_vs_Best_notNN_female, pch=16, asp=1)
MoranOLS <- lm(NN_boost_vs_Best_notNN_female_Lag ~ s$NN_boost_vs_Best_notNN_female)
abline(MoranOLS, col="blue")

summary(MoranOLS)
## 
## Call:
## lm(formula = NN_boost_vs_Best_notNN_female_Lag ~ s$NN_boost_vs_Best_notNN_female)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.037878 -0.017439 -0.009258  0.006185  0.140546 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                      0.01060    0.01001   1.059    0.305  
## s$NN_boost_vs_Best_notNN_female  0.51415    0.20085   2.560    0.021 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04015 on 16 degrees of freedom
##   (228 observations deleted due to missingness)
## Multiple R-squared:  0.2906, Adjusted R-squared:  0.2462 
## F-statistic: 6.553 on 1 and 16 DF,  p-value: 0.02098

Ok, we have some moderate spatial autocorrelation (\(0.51\)) for females. The autpcorrelation is significant at \(\alpha=0.05\), but not at \(\alpha=0.01\) level.

Computing the Moran’s \(I\) statistic for Males.

NN_boost_vs_Best_notNN_male_Lag <- lag.listw(lw, s$NN_boost_vs_Best_notNN_male,
                                               zero.policy = FALSE, NAOK = TRUE)

plot(NN_boost_vs_Best_notNN_male_Lag ~ s$NN_boost_vs_Best_notNN_male, pch=16, asp=1)
MoranOLS <- lm(NN_boost_vs_Best_notNN_male_Lag ~ s$NN_boost_vs_Best_notNN_male)
abline(MoranOLS, col="blue")

summary(MoranOLS)
## 
## Call:
## lm(formula = NN_boost_vs_Best_notNN_male_Lag ~ s$NN_boost_vs_Best_notNN_male)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.07614 -0.01560 -0.00134  0.01225  0.11091 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                   0.007235   0.010088   0.717   0.4836  
## s$NN_boost_vs_Best_notNN_male 0.516516   0.180900   2.855   0.0115 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03816 on 16 degrees of freedom
##   (228 observations deleted due to missingness)
## Multiple R-squared:  0.3375, Adjusted R-squared:  0.2961 
## F-statistic: 8.153 on 1 and 16 DF,  p-value: 0.01146

Ok, we have some moderate spatial autocorrelation (\(0.52\)) for males. The autpcorrelation is significant at \(\alpha=0.05\), but not at \(\alpha=0.01\) level.

The level of spatial autocorrelation is similar for males and for females, but for males it is seems that some outlier effects are more responsible.